This script analyzes UFC fight odds data.
library(tidyverse)
Load data.
load("./Datasets/df_master.RData")
Get summary.
summary(df_master)
## NAME Date Event City
## Length:5914 Length:5914 Length:5914 Length:5914
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## State Country FightWeightClass Round
## Length:5914 Length:5914 Length:5914 Min. :1.000
## Class :character Class :character Class :character 1st Qu.:1.000
## Mode :character Mode :character Mode :character Median :3.000
## Mean :2.428
## 3rd Qu.:3.000
## Max. :5.000
##
## Method Winner_Odds Loser_Odds Sex
## Length:5914 Length:5914 Length:5914 Length:5914
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## fight_id Result FighterWeight FighterWeightClass
## Min. : 1 Length:5914 Min. :115.0 Length:5914
## 1st Qu.: 740 Class :character 1st Qu.:135.0 Class :character
## Median :1479 Mode :character Median :155.0 Mode :character
## Mean :1479 Mean :163.9
## 3rd Qu.:2218 3rd Qu.:185.0
## Max. :2957 Max. :265.0
##
## REACH SLPM SAPM STRA
## Min. :58.00 Min. : 0.000 Min. : 0.100 Min. :0.0000
## 1st Qu.:69.00 1st Qu.: 2.680 1st Qu.: 2.630 1st Qu.:0.3900
## Median :72.00 Median : 3.440 Median : 3.220 Median :0.4400
## Mean :71.77 Mean : 3.527 Mean : 3.429 Mean :0.4415
## 3rd Qu.:75.00 3rd Qu.: 4.250 3rd Qu.: 4.030 3rd Qu.:0.4900
## Max. :84.00 Max. :11.140 Max. :23.330 Max. :0.8000
## NA's :210
## STRD TD TDA TDD
## Min. :0.0900 Min. : 0.00 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.5100 1st Qu.: 0.56 1st Qu.:0.2700 1st Qu.:0.5100
## Median :0.5600 Median : 1.21 Median :0.3700 Median :0.6400
## Mean :0.5527 Mean : 1.52 Mean :0.3746 Mean :0.6163
## 3rd Qu.:0.6000 3rd Qu.: 2.16 3rd Qu.:0.5000 3rd Qu.:0.7600
## Max. :0.9200 Max. :14.19 Max. :1.0000 Max. :1.0000
##
## SUBA
## Min. : 0.0000
## 1st Qu.: 0.1000
## Median : 0.4000
## Mean : 0.5555
## 3rd Qu.: 0.8000
## Max. :12.1000
##
Redefine variables.
df_master$NAME = as.factor(df_master$NAME)
df_master$Date = as.Date(df_master$Date)
df_master$Event = as.factor(df_master$Event)
df_master$City= as.factor(df_master$City)
df_master$State = as.factor(df_master$State)
df_master$Country = as.factor(df_master$Country)
df_master$FightWeightClass = as.factor(df_master$FightWeightClass)
df_master$Method = as.factor(df_master$Method)
df_master$Winner_Odds = as.numeric(df_master$Winner_Odds)
df_master$Loser_Odds = as.numeric(df_master$Loser_Odds)
df_master$fight_id = as.factor(df_master$fight_id)
df_master$Sex = as.factor(df_master$Sex)
df_master$Result = as.factor(df_master$Result)
df_master$FighterWeightClass = as.factor(df_master$FighterWeightClass)
Summarize again… There are infinite odds and overturned / DQ fight outcomes. These will have to be removed.
summary(df_master)
## NAME Date
## Donald Cerrone : 24 Min. :2013-04-27
## Ovince Saint Preux: 21 1st Qu.:2015-08-08
## Jim Miller : 19 Median :2017-04-22
## Derrick Lewis : 18 Mean :2017-06-03
## Neil Magny : 18 3rd Qu.:2019-03-30
## Tim Means : 18 Max. :2020-12-19
## (Other) :5796
## Event City
## UFC Fight Night: Poirier vs. Gaethje: 28 Las Vegas :1222
## UFC Fight Night: Whittaker vs. Till : 28 Abu Dhabi : 210
## UFC 190: Rousey vs Correia : 26 Boston : 124
## UFC 193: Rousey vs Holm : 26 Rio de Janeiro: 124
## UFC 210: Cormier vs. Johnson 2 : 26 Chicago : 118
## UFC 224: Nunes vs. Pennington : 26 Newark : 114
## (Other) :5754 (Other) :4002
## State Country FightWeightClass
## Nevada :1222 USA :3440 Lightweight : 978
## Texas : 256 Brazil : 532 Welterweight : 976
## New York : 252 Canada : 378 Bantamweight : 840
## California: 250 Australia : 236 Featherweight: 712
## Abu Dhabi : 210 United Arab Emirates: 210 Middleweight : 646
## Florida : 176 United Kingdom : 184 Flyweight : 484
## (Other) :3548 (Other) : 934 (Other) :1278
## Round Method Winner_Odds Loser_Odds Sex
## Min. :1.000 DQ : 14 Min. :1.06 Min. :1.07 Female: 754
## 1st Qu.:1.000 KO/TKO :1890 1st Qu.:1.42 1st Qu.:1.77 Male :5160
## Median :3.000 M-DEC : 34 Median :1.71 Median :2.38
## Mean :2.428 Overturned: 20 Mean : Inf Mean : Inf
## 3rd Qu.:3.000 S-DEC : 620 3rd Qu.:2.33 3rd Qu.:3.36
## Max. :5.000 SUB :1052 Max. : Inf Max. : Inf
## U-DEC :2284
## fight_id Result FighterWeight FighterWeightClass
## 1 : 2 Loser :2957 Min. :115.0 Welterweight : 997
## 2 : 2 Winner:2957 1st Qu.:135.0 Lightweight : 984
## 3 : 2 Median :155.0 Bantamweight : 783
## 4 : 2 Mean :163.9 Featherweight: 711
## 5 : 2 3rd Qu.:185.0 Middleweight : 651
## 6 : 2 Max. :265.0 Flyweight : 541
## (Other):5902 (Other) :1247
## REACH SLPM SAPM STRA
## Min. :58.00 Min. : 0.000 Min. : 0.100 Min. :0.0000
## 1st Qu.:69.00 1st Qu.: 2.680 1st Qu.: 2.630 1st Qu.:0.3900
## Median :72.00 Median : 3.440 Median : 3.220 Median :0.4400
## Mean :71.77 Mean : 3.527 Mean : 3.429 Mean :0.4415
## 3rd Qu.:75.00 3rd Qu.: 4.250 3rd Qu.: 4.030 3rd Qu.:0.4900
## Max. :84.00 Max. :11.140 Max. :23.330 Max. :0.8000
## NA's :210
## STRD TD TDA TDD
## Min. :0.0900 Min. : 0.00 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.5100 1st Qu.: 0.56 1st Qu.:0.2700 1st Qu.:0.5100
## Median :0.5600 Median : 1.21 Median :0.3700 Median :0.6400
## Mean :0.5527 Mean : 1.52 Mean :0.3746 Mean :0.6163
## 3rd Qu.:0.6000 3rd Qu.: 2.16 3rd Qu.:0.5000 3rd Qu.:0.7600
## Max. :0.9200 Max. :14.19 Max. :1.0000 Max. :1.0000
##
## SUBA
## Min. : 0.0000
## 1st Qu.: 0.1000
## Median : 0.4000
## Mean : 0.5555
## 3rd Qu.: 0.8000
## Max. :12.1000
##
How many events does the dataset include?
length(unique(df_master$Event))
## [1] 258
How many fights?
length(unique(df_master$fight_id))
## [1] 2957
Over what time frame?
range(sort(unique(df_master$Date)))
## [1] "2013-04-27" "2020-12-19"
Make copy for analysis.
df_odds = df_master
rm(df_master)
Filter out controversial results and infinite odds.
df_odds %>%
dplyr::filter(
(Method != "DQ") & (Method != "Overturned")
, is.finite(Winner_Odds)
, is.finite(Loser_Odds)
) -> df_odds
Get rid of fighter-specifics so that we can spread the data frame. This will give us one event per row.
df_odds %>%
dplyr::select(-c(FighterWeight:SUBA)) %>%
spread(Result, NAME) -> df_odds_short
How often were the (best) odds equal?
mean(df_odds$Winner_Odds == df_odds$Loser_Odds)
## [1] 0.005477576
sum(df_odds$Winner_Odds == df_odds$Loser_Odds)
## [1] 32
Filter out equal odds and identify if Favorite won the fight.
df_odds_short %>%
dplyr::filter(Winner_Odds != Loser_Odds) %>% # filter out equal odds
dplyr::mutate(
Favorite_was_Winner = ifelse(Winner_Odds < Loser_Odds, T, F)
, Favorite_Unit_Profit = ifelse(Favorite_was_Winner, Winner_Odds - 1, -1)
, Underdog_Unit_Profit = ifelse(!Favorite_was_Winner, Winner_Odds - 1, -1)
) -> df_odds_short
What was the mean unit profit (i.e. ROI) if one bet solely on the Favorite?
mean(df_odds_short$Favorite_Unit_Profit)
## [1] -0.02288468
What was the mean unit profit if one bet solely on the Underdog?
mean(df_odds_short$Underdog_Unit_Profit)
## [1] -0.002137694
What proportion of the time does the Favorite win?
mean(df_odds_short$Favorite_was_Winner)
## [1] 0.6461274
Calculate implied probability of each fight based on odds.
df_odds_short %>% dplyr::mutate(
Favorite_Probability = ifelse(Favorite_was_Winner, 1/Winner_Odds, 1/Loser_Odds)
, Underdog_Probability = ifelse(!Favorite_was_Winner, 1/Winner_Odds, 1/Loser_Odds)
) -> df_odds_short
Calculate overround for each fight.
NOTE: these odds are the best available odds for each fight / fighter. Therefore, this is not overround in the traditional sense (looking at one particular odds maker).
df_odds_short %>%
dplyr::mutate(
Total_Probability = Favorite_Probability + Underdog_Probability
, Overround = Total_Probability - 1
) -> df_odds_short
There is very little overround. This is because we are picking the best odds for each fight / fighter. By picking the best odds, we are counteracting the built-in overround of any particular odds-maker (typically around 5% as a rough estimate).
mean(df_odds_short$Overround)
## [1] 0.004323296
mean(df_odds_short$Total_Probability)
## [1] 1.004323
Add year as variable.
df_odds_short %>%
dplyr::mutate(
Year = format(Date,"%Y")
) -> df_odds_short
Create function to graphically assess over performance as a function of several variables. These are not inferential analyses but are instead meant to visualize the data to observe trends for further analysis.
gauge_over_performance = function(num_bin = 10, min_bin_size = 30, variable = NULL) {
# get bins for Favorite
df_odds_short$Favorite_Probability_Bin = cut(df_odds_short$Favorite_Probability, num_bin)
# get bins for Underdog
df_odds_short$Underdog_Probability_Bin = cut(df_odds_short$Underdog_Probability, num_bin)
if (is.null(variable)) {
# check over/under performance for Favorites
df_odds_short %>%
dplyr::group_by(Favorite_Probability_Bin) %>%
dplyr::summarise(
Prop_of_Victory = mean(Favorite_was_Winner)
, Size_of_Bin = length(Favorite_was_Winner)
, ROI = mean(Favorite_Unit_Profit)
) -> fav_perf
} else {
# create dummy variable for function
df_odds_short$Dummy = df_odds_short[
,which(colnames(df_odds_short) == sprintf("%s", variable))
]
# check over/under performance for Favorites
df_odds_short %>%
dplyr::group_by(Favorite_Probability_Bin, Dummy) %>%
dplyr::summarise(
Prop_of_Victory = mean(Favorite_was_Winner)
, Size_of_Bin = length(Favorite_was_Winner)
, ROI = mean(Favorite_Unit_Profit)
) -> fav_perf
}
# extract bins
fav_labs <- as.character(fav_perf$Favorite_Probability_Bin)
fav_bins = as.data.frame(
cbind(
lower = as.numeric( sub("\\((.+),.*", "\\1", fav_labs) )
, upper = as.numeric( sub("[^,]*,([^]]*)\\]", "\\1", fav_labs) )
)
)
# get value in middle of bin
fav_bins %>% dplyr::mutate(mid_bin = (lower + upper)/2 ) -> fav_bins
# add mid bin column
fav_perf$Mid_Bin = fav_bins$mid_bin
# add Over performance column
fav_perf %>% dplyr::mutate(Over_Performance = Prop_of_Victory - Mid_Bin) -> fav_perf
if (is.null(variable)) {
# plot over/under performance
fav_perf %>%
dplyr::filter(Size_of_Bin >= min_bin_size) %>%
ggplot(aes(x=Mid_Bin*100, y=Over_Performance * 100))+
geom_point()+
geom_smooth(se=F)+
geom_hline(yintercept = 0, linetype = "dotted")+
ylab("Over Performance (%)")+
xlab("Expected Probability (%)")+
ggtitle("Favorites")->gg
print(gg)
# plot over/under performance
fav_perf %>%
dplyr::filter(Size_of_Bin >= min_bin_size) %>%
ggplot(aes(x=Mid_Bin * 100, y=Prop_of_Victory*100))+
geom_point()+
geom_smooth(se=F)+
ylab("Probability of Victory (%)")+
xlab("Expected Probability (%)")+
geom_abline(slope=1, intercept=0, linetype = "dotted")+
ggtitle("Favorites")->gg
print(gg)
# plot ROI - only real difference is scale along y axis
fav_perf %>%
dplyr::filter(Size_of_Bin >= min_bin_size) %>%
ggplot(aes(x=Mid_Bin*100, y= ROI* 100))+
geom_point()+
geom_smooth(se=F)+
geom_hline(yintercept = 0, linetype = "dotted")+
ylab("ROI (%)")+
xlab("Expected Probability (%)")+
ggtitle("Favorites") -> gg
print(gg)
} else {
# plot over/under performance
fav_perf %>%
dplyr::filter(Size_of_Bin >= min_bin_size) %>%
ggplot(aes(x=Mid_Bin*100, y=Over_Performance * 100, group=Dummy, colour = Dummy))+
geom_point()+
geom_smooth(se=F)+
geom_hline(yintercept = 0, linetype = "dotted")+
ylab("Over Performance (%)")+
xlab("Expected Probability (%)")+
ggtitle("Favorites")+
labs(color=sprintf("%s", variable)) -> gg
print(gg)
# plot ROI - only real difference is scale along y axis
fav_perf %>%
dplyr::filter(Size_of_Bin >= min_bin_size) %>%
ggplot(aes(x=Mid_Bin*100, y= ROI* 100, group=Dummy, colour = Dummy))+
geom_point()+
geom_smooth(se=F)+
geom_hline(yintercept = 0, linetype = "dotted")+
ylab("ROI (%)")+
xlab("Expected Probability (%)")+
ggtitle("Favorites")+
labs(color=sprintf("%s", variable)) -> gg
print(gg)
}
if (is.null(variable)) {
# check over/under performance for Underdogs
df_odds_short %>%
dplyr::group_by(Underdog_Probability_Bin) %>%
dplyr::summarise(
Prop_of_Victory = mean(!Favorite_was_Winner)
, Size_of_Bin = length(!Favorite_was_Winner)
, ROI = mean(Underdog_Unit_Profit)
) -> under_perf
} else {
# check over/under performance for Underdogs
df_odds_short %>%
dplyr::group_by(Underdog_Probability_Bin, Dummy) %>%
dplyr::summarise(
Prop_of_Victory = mean(!Favorite_was_Winner)
, Size_of_Bin = length(!Favorite_was_Winner)
, ROI = mean(Underdog_Unit_Profit)
) -> under_perf
}
# extract bins
under_labs <- as.character(under_perf$Underdog_Probability_Bin)
under_bins = as.data.frame(
cbind(
lower = as.numeric( sub("\\((.+),.*", "\\1", under_labs) )
, upper = as.numeric( sub("[^,]*,([^]]*)\\]", "\\1", under_labs) )
)
)
# get value in middle of bin
under_bins %>% dplyr::mutate(mid_bin = (lower + upper)/2 ) -> under_bins
# add mid bin column
under_perf$Mid_Bin = under_bins$mid_bin
# add Over performance column
under_perf %>% dplyr::mutate(Over_Performance = Prop_of_Victory - Mid_Bin) -> under_perf
if (is.null(variable)) {
# plot over/under performance
under_perf %>%
dplyr::filter(Size_of_Bin >= min_bin_size) %>%
ggplot(aes(x=Mid_Bin*100, y=Over_Performance * 100))+
geom_point()+
geom_smooth(se=F)+
geom_hline(yintercept = 0, linetype = "dotted")+
ylab("Over Performance (%)")+
xlab("Expected Probability (%)")+
ggtitle("Underdogs")->gg
print(gg)
# plot over/under performance
under_perf %>%
dplyr::filter(Size_of_Bin >= min_bin_size) %>%
ggplot(aes(x=Mid_Bin * 100, y=Prop_of_Victory*100))+
geom_point()+
geom_smooth(se=F)+
ylab("Probability of Victory (%)")+
xlab("Expected Probability (%)")+
geom_abline(slope=1, intercept=0, linetype = "dotted")+
ggtitle("Underdogs")->gg
print(gg)
under_perf %>%
dplyr::filter(Size_of_Bin >= min_bin_size) %>%
ggplot(aes(x=Mid_Bin*100, y=ROI * 100))+
geom_point()+
geom_smooth(se=F)+
geom_hline(yintercept = 0, linetype = "dotted")+
ylab("ROI (%)")+
xlab("Expected Probability (%)")+
ggtitle("Underdogs")-> gg
print(gg)
} else {
# plot over/under performance
under_perf %>%
dplyr::filter(Size_of_Bin >= min_bin_size) %>%
ggplot(aes(x=Mid_Bin*100, y=Over_Performance * 100, group=Dummy, colour = Dummy))+
geom_point()+
geom_smooth(se=F)+
geom_hline(yintercept = 0, linetype = "dotted")+
ylab("Over Performance (%)")+
xlab("Expected Probability (%)")+
ggtitle("Underdogs")+
labs(color=sprintf("%s", variable)) -> gg
print(gg)
under_perf %>%
dplyr::filter(Size_of_Bin >= min_bin_size) %>%
ggplot(aes(x=Mid_Bin*100, y=ROI * 100, group=Dummy, colour = Dummy))+
geom_point()+
geom_smooth(se=F)+
geom_hline(yintercept = 0, linetype = "dotted")+
ylab("ROI (%)")+
xlab("Expected Probability (%)")+
ggtitle("Underdogs")+
labs(color=sprintf("%s", variable)) -> gg
print(gg)
}
# process to return()
under_perf$Is_Fav = F
under_perf %>%
rename(Probability_Bin = Underdog_Probability_Bin) -> under_perf
fav_perf$Is_Fav = T
fav_perf %>%
rename(Probability_Bin = Favorite_Probability_Bin) -> fav_perf
return(rbind(fav_perf, under_perf))
}
Look at how expected performance predicts over performance.
gauge_over_performance(num_bin = 10, min_bin_size = 100, variable = NULL)
Is there any stability across years? Need to reduce minimum bin size to get estimates. As a result, estimates will be more noisy.
gauge_over_performance(num_bin = 10, min_bin_size = 30, variable = "Year")
Does the method of victory affect the relationship between odds and outcome? Reduce number of bins (compared to Year comparison above) to stabilize estimates. Graphs do not tell whole story due to number of data points available across bins.
odds_perf_by_method = gauge_over_performance(num_bin = 5, min_bin_size = 30, variable = "Method")
print(odds_perf_by_method)
## # A tibble: 47 x 8
## # Groups: Probability_Bin [10]
## Probability_Bin Dummy Prop_of_Victory Size_of_Bin ROI Mid_Bin
## <fct> <fct> <dbl> <int> <dbl> <dbl>
## 1 (0.399,0.509] KO/T… 0.5 14 0.0171 0.454
## 2 (0.399,0.509] S-DEC 0.5 2 0 0.454
## 3 (0.399,0.509] SUB 0.714 7 0.42 0.454
## 4 (0.399,0.509] U-DEC 0.565 23 0.137 0.454
## 5 (0.509,0.617] KO/T… 0.530 349 -0.0624 0.563
## 6 (0.509,0.617] M-DEC 0.636 11 0.123 0.563
## 7 (0.509,0.617] S-DEC 0.420 162 -0.255 0.563
## 8 (0.509,0.617] SUB 0.543 197 -0.0432 0.563
## 9 (0.509,0.617] U-DEC 0.554 453 -0.0223 0.563
## 10 (0.617,0.726] KO/T… 0.676 296 0.00368 0.672
## # … with 37 more rows, and 2 more variables: Over_Performance <dbl>,
## # Is_Fav <lgl>
How does fight finishing method vary with implied probability of vegas odds?
odds_perf_by_method %>%
dplyr::filter(Is_Fav == T) %>%
ggplot(aes(x=Mid_Bin, y=Size_of_Bin, group = Dummy, color = Dummy))+
geom_point()+
geom_smooth(se=F)+
ylab("Count")+
xlab("Implied Probability (%)")+
ggtitle("Favorites")+
labs(color="Method")
odds_perf_by_method %>%
dplyr::filter(Is_Fav == F) %>%
ggplot(aes(x=Mid_Bin, y=Size_of_Bin, group = Dummy, color = Dummy))+
geom_point()+
geom_smooth(se=F)+
ylab("Count")+
xlab("Implied Probability (%)")+
ggtitle("Underdogs")+
labs(color="Method")
Calculate the proportion of fights that end by various methods as a function of implied probability of fight odds.
odds_perf_by_method %>%
group_by(Is_Fav, Mid_Bin) %>%
summarise(Total_Count = sum(Size_of_Bin)) -> total_count
odds_perf_by_method %>%
group_by(Is_Fav, Mid_Bin, Dummy) %>%
summarise(Count= Size_of_Bin) -> single_count
method_count_by_odds = merge(single_count, total_count)
method_count_by_odds %>%
dplyr::mutate(Method_Prop = Count / Total_Count ) -> method_count_by_odds
method_count_by_odds %>%
dplyr::filter(Is_Fav == T) %>%
ggplot(aes(x=Mid_Bin*100, y=Method_Prop*100, group = Dummy, color=Dummy))+
geom_point()+
geom_smooth(se=F)+
ylab("Probability of Method (%)")+
xlab("Implied Probability (%)")+
ggtitle("Favorites")+
labs(color="Method")
method_count_by_odds %>%
dplyr::filter(Is_Fav == F) %>%
ggplot(aes(x=Mid_Bin*100, y=Method_Prop*100, group = Dummy, color=Dummy))+
geom_point()+
geom_smooth(se=F)+
ylab("Probability of Method (%)")+
xlab("Implied Probability (%)")+
ggtitle("Underdogs")+
labs(color="Method")
Get rid of useless columns.
df_odds %>% dplyr::select(
c(
NAME
, Event
, Date
, Result
, Winner_Odds
, Loser_Odds
)
) -> df_odds_long
Summarize data.
summary(df_odds_long)
## NAME Event
## Donald Cerrone : 24 UFC Fight Night: Poirier vs. Gaethje: 28
## Ovince Saint Preux: 21 UFC Fight Night: Whittaker vs. Till : 28
## Jim Miller : 19 UFC 190: Rousey vs Correia : 26
## Derrick Lewis : 18 UFC 193: Rousey vs Holm : 26
## Neil Magny : 18 UFC 210: Cormier vs. Johnson 2 : 26
## Tim Means : 18 UFC 224: Nunes vs. Pennington : 26
## (Other) :5724 (Other) :5682
## Date Result Winner_Odds Loser_Odds
## Min. :2013-04-27 Loser :2921 Min. : 1.060 Min. : 1.070
## 1st Qu.:2015-08-08 Winner:2921 1st Qu.: 1.420 1st Qu.: 1.770
## Median :2017-04-22 Median : 1.710 Median : 2.380
## Mean :2017-06-01 Mean : 1.975 Mean : 2.811
## 3rd Qu.:2019-03-30 3rd Qu.: 2.300 3rd Qu.: 3.350
## Max. :2020-12-19 Max. :12.990 Max. :14.050
##
Add Fighter Odds column.
df_odds_long %>%
dplyr::mutate(
Fighter_Odds = ifelse(Result == "Winner", Winner_Odds, Loser_Odds)
) -> df_odds_long
Add Implied Probability column.
df_odds_long %>%
dplyr::mutate(
Implied_Probability = 1/Fighter_Odds
, Won = ifelse(Result == "Winner", T, F)
, Logit_Prob = qlogis(Implied_Probability)
) -> df_odds_long
Get performance and odds.
df_odds_long %>%
dplyr::group_by(NAME) %>%
dplyr::summarise(
Exp_Prop = mean(Implied_Probability)
, Logit_Exp_Prop = mean(Logit_Prob)
, Win_Prop = mean(Won)
, N_Fights = length(Won)
, Over_Performance = Win_Prop - Exp_Prop
, Logit_Over = qlogis(Win_Prop) - Logit_Exp_Prop
, Back_Trans_Exp = plogis(Logit_Exp_Prop)
) -> df_odds_long_fighters
Top 10 over-performers with at least 5 fights where number of fights is simply number available in the dataset (see above).
df_odds_long_fighters %>%
dplyr::filter(N_Fights >= 5) %>%
dplyr::arrange(desc(Over_Performance))
# now with logit
df_odds_long_fighters %>%
dplyr::filter(N_Fights >= 5) %>%
dplyr::arrange(desc(Logit_Over))
Top 10 under performers with at least 5 fights.
df_odds_long_fighters %>%
dplyr::filter(N_Fights >= 5) %>%
dplyr::arrange(Over_Performance)
# with logit
df_odds_long_fighters %>%
dplyr::filter(N_Fights >= 5) %>%
dplyr::arrange(Logit_Over)
Most highly favorited fighters with at least 5 fights
df_odds_long_fighters %>%
dplyr::filter(N_Fights >= 5) %>%
dplyr::arrange(desc(Exp_Prop))
# with logit
df_odds_long_fighters %>%
dplyr::filter(N_Fights >= 5) %>%
dplyr::arrange(desc(Logit_Exp_Prop))
Most undervalued fighters with at least 5 fights.
df_odds_long_fighters %>%
dplyr::filter(N_Fights >= 5) %>%
dplyr::arrange(Exp_Prop)
# with logit
df_odds_long_fighters %>%
dplyr::filter(N_Fights >= 5) %>%
dplyr::arrange(Logit_Exp_Prop)
Examine odds for specific fighters.
# Israel Adesanya
df_odds_long_fighters %>% dplyr::filter(NAME == "Israel Adesanya")
# Anthony Smith
df_odds_long_fighters %>% dplyr::filter(NAME == "Anthony Smith")